/// Create user-level dataset with demographics, vehicle information, and Hugo experiment status

* Load users
use "data/inputs/hugo/users", clear

* Clean up timestamps from users:
gen timestamp_usercreated = firstseenatms
	format timestamp_usercreated %tc
gen timestamp_bind = hugobindtimestampms
	format timestamp_bind %tc
* Drop unused/unformatted timestamps/dates 
drop firstseenatms createdatms lastquotetimestampms hugobindtimestampms 

* Create and check variables for inclusion in summary statistics table:
rename year car_year
rename make car_make
rename model car_model
rename homezip zip
assert !missing(gender)
gen female = gender=="female"
assert !missing(hassr22)
gen sr22 = hassr22=="true"
assert !missing(licensestatus)
gen valid_license = licensestatus=="valid"
assert !missing(driverslicensefirstissuedyear) & !missing(timestamp_usercreated)
gen years_exp = year(dofc(timestamp_usercreated)) - driverslicensefirstissuedyear
assert !missing(age)
gen car_age = year(dofc(timestamp_usercreated)) - car_year

drop gender hassr22 licensestatus driverslicensefirstissuedyear

*merge with KPIs dashboard from hugo for users who match the experiment sample 
merge 1:1 id using "data/inputs/hugo/hugo_dashboard", keep(match master) gen(maction) ///
	keepusing(endstudy canceldate iscanceled retainedalltheway)

* Clean up timestamps from dashboard:
* End study is in M/D/Y h:m
gen timestamp_endstudy = clock(endstudy, "MD20Yhm")
	format timestamp_endstudy %tc
* canceldate is in m/d/y
gen date_canceled = date(canceldate, "MD20Y")
*no timestamp, so use %td; also use date in the above line
	format date_canceled %td

* Code up other dashboard variables, drop the rest:
gen canceled = .
	replace canceled = 0 if bound==1
	replace canceled = 1 if bound==1 & iscanceled=="Yes"
gen covered_3m = inlist(retainedalltheway,"Control","Yes")==1

********************************************************************************
*** Merge MMY, VIN values

* Replace MMY with upper case for match
foreach m in make model {
	replace car_`m' = upper(car_`m')
}

merge m:1 car_make car_model car_year using ///
	"data/int/MTurk MMY Clean.dta", nogen keep(1 3)

* 40 missing observations for make-model-year, assign them the median value for their car year:
bysort car_year: egen car_value = median(makemodelyearvalue_private)
	replace car_value = 401 if car_make=="1986" // replace with midpoint between 1985 and 1987

* Vehicle Value (Private resale value for make/model/year based on Edmund's, collected via MTurk)
replace car_value = makemodelyearvalue_private if !missing(makemodelyearvalue_private)
	assert !missing(car_value) if car_year!=1969 // leave 1969 Pontiac Firebird missing...
sum makemodelyearvalue_private, det
local median_car_value = r(p50)
gen proxy_cheapcar = makemodelyearvalue_private<`median_car_value'

label variable makemodelyearvalue_private "MMY Private"
label variable makemodelyearvalue_retail "MMY Retail"
label variable makemodelyearvalue_tradein "MMY Trade-In"
label variable mileage "Car Mileage"

save "data/int/hugo clean.dta", replace

